package com.geccocrawler.gecco.demo.heilongjiang;

import com.geccocrawler.gecco.GeccoEngine;
import com.geccocrawler.gecco.annotation.Gecco;
import com.geccocrawler.gecco.annotation.HtmlField;
import com.geccocrawler.gecco.request.HttpGetRequest;
import com.geccocrawler.gecco.spider.HtmlBean;
import lombok.Getter;
import lombok.Setter;

import java.util.List;

/**
 * Created with IntelliJ IDEA.
 * Description:黑龙江药店
 * User: weixiaohu
 * Date: 2019/6/3
 * Time: 11:28
 */

@Gecco(matchUrl="http://www.hl.lss.gov.cn/hljsyb/list.jsp?type=yd")
@Getter
@Setter
public class DrugStoreSpider implements HtmlBean {

    @HtmlField(cssPath="body > table:nth-child(3) > tbody > tr:nth-child(2) > td > table:nth-child(1) > tbody > tr > td > div > ul")
    private List<Category> categoryList;

    public static void main(String[] args) {
        //先获取分类列表
        HttpGetRequest start = new HttpGetRequest("http://www.hl.lss.gov.cn/hljsyb/list.jsp?type=yd");
        start.setCharset("GBK");
        GeccoEngine.create()
                .classpath("com.geccocrawler.gecco.demo.heilongjiang")
                //开始抓取的页面地址
                .start(start)
                //开启几个爬虫线程
                .thread(3)
                //单个爬虫每次抓取完一个请求后的间隔时间
                .interval(2000)
                .run();

        GeccoEngine.create()
                .classpath("com.geccocrawler.gecco.demo.heilongjiang")
                .thread(3)
                .interval(2000)
                .start(RegionProcessPipeline.regionRequests)
                .start();

    }
}
